HW5

Author

Ruixuan Deng

This is the link to my repo.

Problem 1 - OOP

a.

Code
library(Rcpp)

cppFunction('
int GCD(int a, int b) {
    return std::gcd(a, b);
}
int LCM(int a, int b) {
    return abs(a * b) / GCD(a, b);
}
')
Code
##' S4 Class for Rational Numbers
##'
##' The `rational` class represents a rational number with a numerator and denominator.
##'
##' @slot numerator An integer representing the numerator.
##' @slot denominator An integer representing the denominator. Must be non-zero.
setClass(
  "rational",
  slots = c(numerator = "integer", denominator = "integer"),
  validity = function(object) {
    if (object@denominator == 0) {
      return("Denominator cannot be zero.")
    }
    TRUE
  }
)

rational <- function(numerator, denominator = 1L) {
  if (denominator == 0) {
    stop("Denominator cannot be zero.")
  }
  new("rational", numerator = as.integer(numerator), denominator = as.integer(denominator))
}

setMethod("show", "rational", function(object) {
  cat(sprintf("%d/%d\n", object@numerator, object@denominator))
})

simplify <- function(r) {
  gcd <- GCD(r@numerator, r@denominator)
  new("rational",
      numerator = as.integer(r@numerator / gcd),
      denominator = as.integer(r@denominator / gcd))
}

quotient <- function(r, digits = getOption("digits")) {
  value <- r@numerator / r@denominator
  formatted_value <- format(value, digits = digits)
  print(formatted_value)
  invisible(value)
}

setMethod("+", c("rational", "rational"), function(e1, e2) {
  num <- e1@numerator * e2@denominator + e2@numerator * e1@denominator
  denom <- e1@denominator * e2@denominator
  simplify(rational(num, denom))
})

setMethod("-", c("rational", "rational"), function(e1, e2) {
  num <- e1@numerator * e2@denominator - e2@numerator * e1@denominator
  denom <- e1@denominator * e2@denominator
  simplify(rational(num, denom))
})

setMethod("*", c("rational", "rational"), function(e1, e2) {
  num <- e1@numerator * e2@numerator
  denom <- e1@denominator * e2@denominator
  simplify(rational(num, denom))
})

setMethod("/", c("rational", "rational"), function(e1, e2) {
  if (e2@numerator == 0) {
    stop("Division by zero.")
  }
  num <- e1@numerator * e2@denominator
  denom <- e1@denominator * e2@numerator
  simplify(rational(num, denom))
})

b.

Code
r1 <- rational(12, 15)
r2 <- rational(7, 11)
r3 <- rational(0, 8)
Code
r1
12/15
Code
r3
0/8
Code
r1 + r2
79/55
Code
r1 - r2
9/55
Code
r1 * r2
28/55
Code
r1 / r2
44/35
Code
r1 + r3
4/5
Code
r1 * r3
0/1
Code
r2 / r3
Error in r2/r3: Division by zero.
Code
quotient(r1)
[1] "0.8"
Code
quotient(r2)
[1] "0.6363636"
Code
quotient(r2, digits = 3)
[1] "0.636"
Code
quotient(r2, digits = 3.14)
[1] "0.636"
Code
quotient(r2, digits = "avocado")
Warning in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, : NAs
introduced by coercion
Error in prettyNum(.Internal(format(x, trim, digits, nsmall, width, 3L, : invalid value -2147483648 for 'digits' argument
Code
q2 <- quotient(r2, digits = 3)
[1] "0.636"
Code
q2
[1] 0.6363636
Code
quotient(r3)
[1] "0"
Code
simplify(r1)
4/5
Code
simplify(r2)
7/11
Code
simplify(r3)
0/1

c. 

Code
r4 <- rational(4,0)
Error in rational(4, 0): Denominator cannot be zero.
Code
r4 <- rational(4,3,2)
Error in rational(4, 3, 2): unused argument (2)
Code
r4 <- rational('4','3')
r4
4/3
Code
r4 <- rational('4/3')
Warning in initialize(value, ...): NAs introduced by coercion
Code
r4 <- rational(1.5,4.3)
r4
1/4

Problem 2 - plotly

a.

Code
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(ggplot2)
art_sales <- read.csv("df_for_ml_improved_new_market.csv")
art_sales <- art_sales %>%
  pivot_longer(cols = starts_with("Genre___"), names_to = "Genre", values_to = "Value") %>%
  filter(Value == 1) %>%
  mutate(Genre = str_replace(Genre, "Genre___", "")) %>%
  select(-Value)

ggplot(art_sales, aes(x = factor(year), fill = Genre)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Distribution of Genre of Sales Across Years", x = "Year", y = "Proportion of Sales") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

b.

Code
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
Code
library(dplyr)
Code
interactive_data <- art_sales %>%
  group_by(year, Genre) %>%
  summarize(median_price = median(price_usd, na.rm = TRUE), .groups = "drop")
Code
interactive_plot <- plot_ly(
  data = interactive_data,
  x = ~year,
  y = ~median_price,
  color = ~Genre,
  type = 'scatter',
  mode = 'lines+markers',
  hoverinfo = 'text',
  text = ~paste("Year:", year, "<br>Genre:", Genre, "<br>Median Price (USD):", median_price)
) %>%
  layout(
    title = "Sales Price in USD Over Time by Genre",
    xaxis = list(title = "Year"),
    yaxis = list(title = "Median Price (USD)"),
    legend = list(title = list(text = "Genre")),
    hovermode = "closest"
  )

interactive_plot

Problem 3 - data.table

Code
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year
The following objects are masked from 'package:dplyr':

    between, first, last
The following object is masked from 'package:purrr':

    transpose
Code
library(nycflights13)
Code
flights_dt <- as.data.table(flights)
airports_dt <- as.data.table(airports)
planes_dt <- as.data.table(planes)

a.

Code
departure_delay_dt <- flights_dt[, .(
  mean_departure_delay = mean(dep_delay, na.rm = TRUE),
  median_departure_delay = median(dep_delay, na.rm = TRUE),
  flight_count = .N
), by = .(dest)][flight_count >= 10]

departure_delay_dt <- merge(departure_delay_dt, airports_dt, by.x = "dest", by.y = "faa")
departure_delay_dt <- departure_delay_dt[
  order(-mean_departure_delay), 
  .(name, mean_departure_delay, median_departure_delay)
]

print(departure_delay_dt)
                                    name mean_departure_delay
                                  <char>                <num>
 1:                Columbia Metropolitan            35.570093
 2:                           Tulsa Intl            34.906355
 3:                    Will Rogers World            30.568807
 4:                      Birmingham Intl            29.694853
 5:                        Mc Ghee Tyson            28.493955
 6:                 Jackson Hole Airport            26.545455
 7:                      Des Moines Intl            26.232955
 8:                        Richmond Intl            23.639847
 9:                          Albany Intl            23.620525
10:               Dane Co Rgnl Truax Fld            23.580071
11:               Cherry Capital Airport            22.083333
12:         Theodore Francis Green State            21.765363
13:            Charlottesville-Albemarle            21.391304
14:                      South Bend Rgnl            21.100000
15:          Manchester Regional Airport            21.024678
16:        Akron Canton Regional Airport            20.820878
17:                     San Antonio Intl            20.737463
18:                     Kansas City Intl            20.326477
19:                          Eppley Afld            20.205596
20:                   Gerald R Ford Intl            19.533333
21:    Cincinnati Northern Kentucky Intl            19.520321
22:                          Bangor Intl            19.475000
23:                      Wilmington Intl            19.416667
24:                       Piedmont Triad            19.398000
25: Greenville-Spartanburg International            19.261965
26:                General Mitchell Intl            18.760118
27:                      Sacramento Intl            18.691489
28:                  Chicago Midway Intl            18.589763
29:            Savannah Hilton Head Intl            18.260292
30:                         Bradley Intl            17.720874
31:            Montrose Regional Airport            17.642857
32:                         Norfolk Intl            17.576389
33:              James M Cox Dayton Intl            17.498573
34:                               Yeager            17.000000
35:               Washington Dulles Intl            16.982935
36:                    Jacksonville Intl            16.484434
37:                Portland Intl Jetport            16.455773
38:     Louisville International Airport            16.426141
39:            Baltimore Washington Intl            16.396816
40:                        Portland Intl            16.255935
41:               Greater Rochester Intl            16.243861
42:                Lambert St Louis Intl            16.010557
43:                       Nashville Intl            15.982471
44:                    Myrtle Beach Intl            15.758621
45:                         Memphis Intl            15.658796
46:                        Eagle Co Rgnl            15.495192
47:                          Denver Intl            15.156228
48:                  Charleston Afb Intl            14.694775
49:                Syracuse Hancock Intl            14.444769
50:                      William P Hobby            14.340191
51:     Louis Armstrong New Orleans Intl            14.249463
52:                    Indianapolis Intl            14.046710
53:    Albuquerque International Sunport            13.740157
54:                      Pittsburgh Intl            13.704966
55:                      Burlington Intl            13.605651
56:                   Chicago Ohare Intl            13.570484
57:                             Bob Hope            13.475676
58:                 Buffalo Niagara Intl            13.423951
59:               Cleveland Hopkins Intl            13.387931
60:            Metropolitan Oakland Intl            13.344051
61:             Minneapolis St Paul Intl            13.324806
62:                Austin Bergstrom Intl            13.025641
63:                      Palm Beach Intl            12.992918
64:                   San Francisco Intl            12.866289
65:       Fort Lauderdale Hollywood Intl            12.731104
66:      Hartsfield Jackson Atlanta Intl            12.509824
67:                  Raleigh Durham Intl            12.445870
68:                         Yampa Valley            12.285714
69:                   Port Columbus Intl            12.223787
70:                           Tampa Intl            12.135007
71:                    Philadelphia Intl            11.998709
72:               Detroit Metro Wayne Co            11.812252
73:                       Gallatin Field            11.457143
74:                         Orlando Intl            11.275998
75:                           Long Beach            11.183735
76:                       San Diego Intl            11.110866
77:         George Bush Intercontinental            10.842179
78:                  Seattle Tacoma Intl            10.725922
79:              Phoenix Sky Harbor Intl            10.412926
80:        Ronald Reagan Washington Natl            10.293000
81:        Norman Y Mineta San Jose Intl            10.103659
82:                       Mc Carran Intl             9.418819
83:                     Los Angeles Intl             9.401344
84:                        Honolulu Intl             9.289362
85:               Charlotte Douglas Intl             9.222879
86:                  Salt Lake City Intl             9.026851
87:                           Miami Intl             8.876558
88:   General Edward Lawrence Logan Intl             8.730613
89:               Dallas Fort Worth Intl             8.681909
90:               Southwest Florida Intl             8.275577
91:           Asheville Regional Airport             8.190114
92:            John Wayne Arpt Orange Co             7.761905
93:              Sarasota Bradenton Intl             7.256027
94:                Martha\\\\'s Vineyard             7.051643
95:                 NW Arkansas Regional             6.464886
96:                        Nantucket Mem             6.456604
97:                        Key West Intl             3.647059
98:                    Palm Springs Intl            -2.944444
                                    name mean_departure_delay
    median_departure_delay
                     <num>
 1:                   14.0
 2:                    8.0
 3:                   10.0
 4:                    1.0
 5:                    0.0
 6:                   13.5
 7:                   -1.0
 8:                   -1.0
 9:                    1.0
10:                   -1.0
11:                   -3.0
12:                    0.0
13:                   -2.5
14:                   14.0
15:                    0.0
16:                    0.0
17:                    1.0
18:                   -1.0
19:                   -1.0
20:                   -1.0
21:                   -2.0
22:                   -2.0
23:                   -3.0
24:                   -1.0
25:                   -1.0
26:                    0.0
27:                    2.0
28:                    2.0
29:                   -1.0
30:                   -1.0
31:                    3.0
32:                   -2.0
33:                   -2.0
34:                   -4.0
35:                   -2.0
36:                   -1.0
37:                   -2.0
38:                   -2.0
39:                   -2.0
40:                    1.0
41:                   -2.0
42:                   -1.0
43:                   -1.0
44:                   -1.0
45:                   -1.0
46:                   -1.0
47:                    1.0
48:                   -2.0
49:                   -2.0
50:                    0.0
51:                   -2.0
52:                   -2.0
53:                    0.0
54:                   -2.0
55:                   -2.0
56:                   -2.0
57:                   -1.0
58:                   -2.0
59:                   -2.0
60:                    0.0
61:                   -2.0
62:                   -1.0
63:                    0.0
64:                    0.0
65:                   -1.0
66:                   -2.0
67:                   -2.0
68:                    6.5
69:                   -3.0
70:                   -1.0
71:                   -3.0
72:                   -3.0
73:                    0.0
74:                   -1.0
75:                   -1.0
76:                    0.0
77:                    0.0
78:                   -1.0
79:                   -1.0
80:                   -3.0
81:                   -1.0
82:                   -1.0
83:                   -1.0
84:                   -1.0
85:                   -3.0
86:                   -1.0
87:                   -2.0
88:                   -3.0
89:                   -3.0
90:                   -2.0
91:                   -3.0
92:                   -1.0
93:                   -3.0
94:                   -2.0
95:                   -5.0
96:                   -3.0
97:                    0.0
98:                   -4.0
    median_departure_delay
Code
arrival_delay_dt <- flights_dt[, .(
  mean_arrival_delay = mean(arr_delay, na.rm = TRUE),
  median_arrival_delay = median(arr_delay, na.rm = TRUE),
  flight_count = .N
), by = .(dest)][flight_count >= 10]

arrival_delay_dt <- merge(arrival_delay_dt, airports_dt, by.x = "dest", by.y = "faa")
arrival_delay_dt <- arrival_delay_dt[
  order(-mean_arrival_delay), 
  .(name, mean_arrival_delay, median_arrival_delay)
]

print(arrival_delay_dt)
                                    name mean_arrival_delay
                                  <char>              <num>
 1:                Columbia Metropolitan        41.76415094
 2:                           Tulsa Intl        33.65986395
 3:                    Will Rogers World        30.61904762
 4:                 Jackson Hole Airport        28.09523810
 5:                        Mc Ghee Tyson        24.06920415
 6:               Dane Co Rgnl Truax Fld        20.19604317
 7:                        Richmond Intl        20.11125320
 8:        Akron Canton Regional Airport        19.69833729
 9:                      Des Moines Intl        19.00573614
10:                   Gerald R Ford Intl        18.18956044
11:                      Birmingham Intl        16.87732342
12:         Theodore Francis Green State        16.23463687
13: Greenville-Spartanburg International        15.93544304
14:    Cincinnati Northern Kentucky Intl        15.36456376
15:            Savannah Hilton Head Intl        15.12950601
16:          Manchester Regional Airport        14.78755365
17:                          Eppley Afld        14.69889841
18:                               Yeager        14.67164179
19:                     Kansas City Intl        14.51405836
20:                          Albany Intl        14.39712919
21:                General Mitchell Intl        14.16722038
22:                       Piedmont Triad        14.11260054
23:               Washington Dulles Intl        13.86420212
24:               Cherry Capital Airport        12.96842105
25:              James M Cox Dayton Intl        12.68048606
26:     Louisville International Airport        12.66938406
27:                  Chicago Midway Intl        12.36422360
28:                      Sacramento Intl        12.10992908
29:                    Jacksonville Intl        11.84483416
30:                       Nashville Intl        11.81245891
31:                Portland Intl Jetport        11.66040210
32:               Greater Rochester Intl        11.56064461
33:      Hartsfield Jackson Atlanta Intl        11.30011285
34:                Lambert St Louis Intl        11.07846451
35:                         Norfolk Intl        10.94909344
36:            Baltimore Washington Intl        10.72673385
37:                         Memphis Intl        10.64531435
38:                   Port Columbus Intl        10.60132291
39:                  Charleston Afb Intl        10.59296847
40:                    Philadelphia Intl        10.12719014
41:                  Raleigh Durham Intl        10.05238095
42:                    Indianapolis Intl         9.94043412
43:            Charlottesville-Albemarle         9.50000000
44:               Cleveland Hopkins Intl         9.18161129
45:        Ronald Reagan Washington Natl         9.06695204
46:                      Burlington Intl         8.95099602
47:                 Buffalo Niagara Intl         8.94595186
48:                Syracuse Hancock Intl         8.90392501
49:                          Denver Intl         8.60650021
50:                      Palm Beach Intl         8.56297210
51:                             Bob Hope         8.17567568
52:       Fort Lauderdale Hollywood Intl         8.08212154
53:                          Bangor Intl         8.02793296
54:           Asheville Regional Airport         8.00383142
55:                      Pittsburgh Intl         7.68099053
56:                       Gallatin Field         7.60000000
57:                 NW Arkansas Regional         7.46572581
58:                           Tampa Intl         7.40852503
59:               Charlotte Douglas Intl         7.36031885
60:             Minneapolis St Paul Intl         7.27016886
61:                      William P Hobby         7.17618819
62:                         Bradley Intl         7.04854369
63:                     San Antonio Intl         6.94537178
64:                      South Bend Rgnl         6.50000000
65:     Louis Armstrong New Orleans Intl         6.49017497
66:                        Key West Intl         6.35294118
67:                        Eagle Co Rgnl         6.30434783
68:                Austin Bergstrom Intl         6.01990875
69:                   Chicago Ohare Intl         5.87661475
70:                         Orlando Intl         5.45464309
71:               Detroit Metro Wayne Co         5.42996346
72:                        Portland Intl         5.14157973
73:                        Nantucket Mem         4.85227273
74:                      Wilmington Intl         4.63551402
75:                    Myrtle Beach Intl         4.60344828
76:    Albuquerque International Sunport         4.38188976
77:         George Bush Intercontinental         4.24079040
78:        Norman Y Mineta San Jose Intl         3.44817073
79:               Southwest Florida Intl         3.23814963
80:                       San Diego Intl         3.13916574
81:              Sarasota Bradenton Intl         3.08243131
82:            Metropolitan Oakland Intl         3.07766990
83:   General Edward Lawrence Logan Intl         2.91439222
84:                   San Francisco Intl         2.67289152
85:                         Yampa Valley         2.14285714
86:              Phoenix Sky Harbor Intl         2.09704733
87:            Montrose Regional Airport         1.78571429
88:                     Los Angeles Intl         0.54711094
89:               Dallas Fort Worth Intl         0.32212685
90:                           Miami Intl         0.29905978
91:                       Mc Carran Intl         0.25772849
92:                  Salt Lake City Intl         0.17625459
93:                           Long Beach        -0.06202723
94:                Martha\\\\'s Vineyard        -0.28571429
95:                  Seattle Tacoma Intl        -1.09909910
96:                        Honolulu Intl        -1.36519258
97:            John Wayne Arpt Orange Co        -7.86822660
98:                    Palm Springs Intl       -12.72222222
                                    name mean_arrival_delay
    median_arrival_delay
                   <num>
 1:                 28.0
 2:                 14.0
 3:                 16.0
 4:                 15.0
 5:                  2.0
 6:                  1.0
 7:                  1.0
 8:                  3.0
 9:                  0.0
10:                  1.0
11:                 -2.0
12:                  1.0
13:                 -0.5
14:                 -3.0
15:                 -1.0
16:                 -3.0
17:                 -2.0
18:                 -1.5
19:                  0.0
20:                 -4.0
21:                  0.0
22:                 -2.0
23:                 -3.0
24:                -10.0
25:                 -3.0
26:                 -2.0
27:                 -1.0
28:                  4.0
29:                 -2.0
30:                 -2.0
31:                 -4.0
32:                 -5.0
33:                 -1.0
34:                 -3.0
35:                 -4.0
36:                 -5.0
37:                 -2.5
38:                 -3.0
39:                 -4.0
40:                 -3.0
41:                 -3.0
42:                 -3.0
43:                 -5.0
44:                 -5.0
45:                 -2.0
46:                 -4.0
47:                 -5.0
48:                 -5.0
49:                 -2.0
50:                 -3.0
51:                 -3.0
52:                 -3.0
53:                 -9.0
54:                 -1.0
55:                 -5.0
56:                 -2.0
57:                 -2.0
58:                 -4.0
59:                 -3.0
60:                 -5.0
61:                 -4.0
62:                -10.0
63:                 -9.0
64:                 -3.5
65:                 -6.0
66:                  7.0
67:                 -4.0
68:                 -5.0
69:                 -8.0
70:                 -5.0
71:                 -7.0
72:                 -5.0
73:                 -3.0
74:                 -7.0
75:                -13.0
76:                 -5.5
77:                 -5.0
78:                 -7.0
79:                 -5.0
80:                 -5.0
81:                 -5.0
82:                 -9.0
83:                 -9.0
84:                 -8.0
85:                  2.0
86:                 -6.0
87:                -10.5
88:                 -7.0
89:                 -9.0
90:                 -9.0
91:                 -8.0
92:                 -8.0
93:                -10.0
94:                -11.0
95:                -11.0
96:                 -7.0
97:                -11.0
98:                -13.5
    median_arrival_delay

b.

Code
fastest_aircraft_dt <- flights_dt[
  !is.na(air_time), 
  .(avg_speed = mean(distance / (air_time / 60), na.rm = TRUE), flight_count = .N), 
  by = .(tailnum)
][order(-avg_speed)][1]

fastest_aircraft_dt <- merge(fastest_aircraft_dt, planes_dt, by = "tailnum")
fastest_aircraft_dt <- fastest_aircraft_dt[, .(model, avg_speed, flight_count)]

fastest_aircraft_dt
     model avg_speed flight_count
    <char>     <num>        <int>
1: 777-222  500.8163            1